/***************************************************************************************************
** WHAT: 	Create de-normalized sampling weights for the paper
			"Institutional mistrust and child vaccination coverage in Africa",
			by Nik Stoop, Kalle Hirvonen and Jean-François Maystadt.
			Published in BMJ:Global Health
			Open access through http://dx.doi.org/10.1136/bmjgh-2020-004595
							
** WHEN: 	April '21

***************************************************************************************************/


* NOTE: The do-file that compiles the analytical database (compileDB_trust vaccines_BMJ.do) runs this do-file on line 1761


clear all
set more off
	
	
*** Get total number of women aged 15-49 in population from World Development Indicators
*** store data in same folder as compiled DHS - AB database
	global datacons "ADD LOCATION OF FOLDER HERE"
	
	import excel "$datacons/Data_Extract_From_World_Development_Indicators_drop.xlsx", first clear

	destring YR*, force replace

	keep if CountryCode=="BEN" | CountryCode=="BFA" | CountryCode=="BDI" | CountryCode=="CMR" | CountryCode=="GHA" | CountryCode=="GIN" | CountryCode=="KEN" | CountryCode=="LSO" | CountryCode=="LBR" | CountryCode=="MWI" | CountryCode=="MLI" | CountryCode=="MOZ" | CountryCode=="NAM" | CountryCode=="NER" | CountryCode=="SEN" | CountryCode=="SLE" | CountryCode=="ZAF" | CountryCode=="TZA" | CountryCode=="TGO" | CountryCode=="UGA" | CountryCode=="ZMB" | CountryCode=="ZWE" 

	egen ident=group(CountryCode SeriesName)

	replace SeriesName="Pop014" if SeriesName=="Population ages 0-14, female"
	replace SeriesName="Pop1564" if SeriesName=="Population ages 15-64, female"
	replace SeriesName="Pop65" if SeriesName=="Population ages 65 and above, female"
	replace SeriesName="Share5054" if SeriesName=="Population ages 50-54, female (% of female population)"
	replace SeriesName="Share5559" if SeriesName=="Population ages 55-59, female (% of female population)"
	replace SeriesName="Share6064" if SeriesName=="Population ages 60-64, female (% of female population)"

	drop if SeriesName=="Population ages 15-19, female (% of female population)"
	drop if SeriesName=="Population ages 20-24, female (% of female population)"
	drop if SeriesName=="Population ages 25-29, female (% of female population)"
	drop if SeriesName=="Population ages 30-34, female (% of female population)"
	drop if SeriesName=="Population ages 35-39, female (% of female population)"
	drop if SeriesName=="Population ages 40-44, female (% of female population)"
	drop if SeriesName=="Population ages 45-49, female (% of female population)"

	reshape long YR, i(ident) j(year)

	gen pop014=YR if SeriesName=="Pop014"
	gen pop1564=YR if SeriesName=="Pop1564"
	gen pop65=YR if SeriesName=="Pop65"
	gen share5054=YR if SeriesName=="Share5054"
	gen share5559=YR if SeriesName=="Share5559"
	gen share6064=YR if SeriesName=="Share6064"

	sort CountryCode year
	collapse share5054 pop014 pop1564 pop65 share5559 share6064, by(CountryCode year)

	gen totpop=pop014+pop1564+pop65
	gen pop5054=totpop*share5054/100
	gen pop5559=totpop*share5559/100
	gen pop6064=totpop*share6064/100

	gen fem_pop_15_49=pop1564-pop5054-pop5559-pop6064

	drop share5054 pop014 pop1564 pop65 share5559 share6064 totpop pop5054 pop5559 pop6064

	rename year dhsyear

	sort CountryCode dhsyear

	save "$datacons/Fempop.dta", replace


*** Get number of female respondents aged 15-49 by survey from DHS

	use "$datacons/vaccination_data_dhs.dta",clear
	tab age_mother

	gen num_fem_15_49=1 
	collapse (first) country phase countrycode (sum) num_fem_15_49 , by(country_id year)

	keep if countrycode=="BJ" | countrycode=="BF" | countrycode=="BU" | countrycode=="CM" | countrycode=="GH" | countrycode=="GN" | countrycode=="KE" | countrycode=="LS" | countrycode=="LB" | countrycode=="MW" | countrycode=="ML" | countrycode=="MZ" | countrycode=="NM" | countrycode=="NI" | countrycode=="SN" | countrycode=="SL" | countrycode=="ZA" | countrycode=="TZ" | countrycode=="TG" | countrycode=="UG" | countrycode=="ZM" | countrycode=="ZW" 

	keep year countrycode num_fem_15_49
	sort countrycode year
	save "$datacons/nbr_surveyed.dta", replace


********************************************************
********************************************************
********************************************************
*** Create the sampling weight
********************************************************
********************************************************
********************************************************

* Start with the Master dataset: DHS data 

	use "$datacons/vaccination_data_dhs.dta",clear

	* create variable to match with World Development Indicator country indicators
	gen CountryCode=""
	replace CountryCode="BEN" if countrycode=="BJ"
	replace CountryCode="BFA" if countrycode=="BF"
	replace CountryCode="BDI" if countrycode=="BU"
	replace CountryCode="CMR" if countrycode=="CM"
	replace CountryCode="GHA" if countrycode=="GH"
	replace CountryCode="GIN" if countrycode=="GN"
	replace CountryCode="KEN" if countrycode=="KE"
	replace CountryCode="LSO" if countrycode=="LS"
	replace CountryCode="LBR" if countrycode=="LB"
	replace CountryCode="MWI" if countrycode=="MW"
	replace CountryCode="MLI" if countrycode=="ML"
	replace CountryCode="MOZ" if countrycode=="MZ"
	replace CountryCode="NAM" if countrycode=="NM"
	replace CountryCode="NER" if countrycode=="NI"
	replace CountryCode="SEN" if countrycode=="SN"
	replace CountryCode="SLE" if countrycode=="SL"
	replace CountryCode="ZAF" if countrycode=="ZA"
	replace CountryCode="TZA" if countrycode=="TZ"
	replace CountryCode="TGO" if countrycode=="TG"
	replace CountryCode="UGA" if countrycode=="UG"
	replace CountryCode="ZMB" if countrycode=="ZM"
	replace CountryCode="ZWE" if countrycode=="ZW"
	keep if CountryCode!=""
	
	gen dhsyear=start_year
	sort CountryCode dhsyear
	
	* merge to get nr. of women 15-49 in population
	merge m:1 CountryCode dhsyear using  "$datacons/Fempop.dta"
	drop if _merge==2
	drop _merge
	
	* merge to get nr. of women 15-49 in survey
	sort countrycode year
	
	merge m:1 countrycode year using  "$datacons/nbr_surveyed.dta"
	drop _merge
	
	gen sampling_rate=num_fem_15_49/fem_pop_15_49	
	gen fancy_wgt=wgt*(1/sampling_rate)

	sum wgt fancy_wgt
	
	* keep only variables needed to match
	sort countrycode year 
	browse countrycode year wgt fancy_wgt v002 v003
	keep cluster v002 v003 midx country_id start_year end_year wgt fancy_wgt
	rename wgt wgt_check
	save "$datacons/fancy_weight.dta",replace

	
*** END ***	
